# -------------------------------------------------------------------
# Purpose: Creates Table 5
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate the table.
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataanalysis))
stopifnot(dir.exists(poutput))


# Load data
load(file.path(pdataanalysis, "countyLevel19001940.RData"))


# Regressions
o <- list()
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_cob_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_cob_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_coo_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_coo_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_race_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_race_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_occ1950_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_occ1950_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws + regex("bpl_[0-9]{3}_share_ws") | gisjoin_1900 + statefip^year, countyLevel19001940)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_cob_adjp_ws + entropy_coo_adjp_ws + entropy_race_adjp_ws + entropy_occ1950_adjp_ws + sum_n_namelast_mp_adjp_ws + regex("bpl_[0-9]{3}_share_ws") | gisjoin_1900 + statefip^year, countyLevel19001940)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_cob_adjp_ws + entropy_coo_adjp_ws + entropy_race_adjp_ws + entropy_occ1950_adjp_ws + sum_n_namelast_mp_adjp_ws + regex("bpl_[0-9]{3}_share_ws") | gisjoin_1900 + statefip^year + gisjoin_1900[year], countyLevel19001940)))
etable(o, cluster = ~statefip, dict = FALSE)

p <- list()
p <- append(p, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
p <- append(p, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_cob_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
p <- append(p, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_cob_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
p <- append(p, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_coo_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
p <- append(p, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_coo_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
p <- append(p, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_race_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
p <- append(p, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_race_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
p <- append(p, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_occ1950_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
p <- append(p, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_occ1950_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year, countyLevel19001940)))
p <- append(p, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws + regex("bpl_[0-9]{3}_share_ws") | gisjoin_1900 + statefip^year, countyLevel19001940)))
p <- append(p, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_cob_adjp_ws + entropy_coo_adjp_ws + entropy_race_adjp_ws + entropy_occ1950_adjp_ws + sum_n_namelast_mp_adjp_ws + regex("bpl_[0-9]{3}_share_ws") | gisjoin_1900 + statefip^year, countyLevel19001940)))
p <- append(p, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_cob_adjp_ws + entropy_coo_adjp_ws + entropy_race_adjp_ws + entropy_occ1950_adjp_ws + sum_n_namelast_mp_adjp_ws + regex("bpl_[0-9]{3}_share_ws") | gisjoin_1900 + statefip^year + gisjoin_1900[year], countyLevel19001940)))
etable(p, cluster = ~statefip, dict = FALSE)


# Create table
y1_mean <- round(mean(countyLevel19001940$sum_patents_pc_1900_f_w), 2)
y1_sd <- round(sd(countyLevel19001940$sum_patents_pc_1900_f_w), 2)
y2_mean <- round(mean(countyLevel19001940$sum_break_p80_rrfsim05_pc_1900_f_w), 2)
y2_sd <- round(sd(countyLevel19001940$sum_break_p80_rrfsim05_pc_1900_f_w), 2)
y1 <- "Patents per 1,000 people"
y2 <- "Breakthrough patents per 1,000 people"

setFixest_dict(
  c(
    entropy_namelast_mp_adjp_ws = "Surname diversity",
    entropy_race_adjp_ws = "Racial diversity",
    entropy_cob_adjp_ws = "Birth-country diversity",
    entropy_coo_adjp_ws = "Ancestral-country diversity",
    entropy_occ1950_adjp_ws = "Occupational diversity",
    sum_n_namelast_mp_adjp_ws = "Population",
    sum_patents_pc_1900_f_w = y1, sum_break_p80_rrfsim05_pc_1900_f_w = y2,
    year = "Period", statefip = "State", gisjoin_1900 = "County"
  )
)

tablename <- file.path(poutput, "table05.tex")
etable(o,
  cluster = ~statefip,
  order = c("Surname", "Birth", "Ancestral", "Racial", "Occupation", "Population"),
  group = list("-_Immigrant shares by 59 birth countries" = c("bpl")),
  fitstat = ~ r2 + n, digits = "r3", digits.stats = "r3",
  file = tablename, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
edit_table_content_fixed(tablename, "Period $\\times $ County", "County-specific linear trends")
remove_table_row(tablename, "Patents per 1,000 people")
add_table_row(tablename, "\\midrule", c("\\textit{Panel A:} & \\multicolumn{12}{c}{Patents per 1,000 people}\\\\" , "\\cmidrule(lr){1-13}"))
move_table_row(tablename, "Observations", "bottomrule")

temptable <- file.path(poutput, "temp.tex")
etable(p,
  cluster = ~statefip,
  order = c("Surname", "Birth", "Ancestral", "Racial", "Occupation", "Population"),
  group = list("-_Immigrant shares by 59 birth countries" = c("bpl")),
  fitstat = ~ r2 + n, digits = "r3", digits.stats = "r3",
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
add_table_row(tablename, "R\\$\\^2\\$", c("\\\\", "\\textit{Panel B:} &  \\multicolumn{12}{c}{Breakthrough patents per 1,000 people}\\\\", "\\cmidrule(lr){1-13}", estimates_rows))
x <- get_table_row(temptable, "R\\$\\^2\\$")
add_table_row(tablename, "County fixed effects", c(x, " \\\\"), "before")
file.remove(temptable)

cat("Table 5 saved to:", tablename, "\n")